package com.webull.information.center.carwler.common.util.jsoup.prase_en;

import java.io.IOException;
import java.text.ParseException;
import java.util.Date;
import java.util.Map;
import java.util.Optional;
import java.util.concurrent.ConcurrentHashMap;
import java.util.function.Supplier;

import org.apache.commons.lang3.RandomStringUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.jsoup.Connection;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Element;

import com.webull.framework.util.UtilDate;
import com.webull.information.center.carwler.common.model.NewsInformation;
import com.webull.information.center.carwler.common.util.jsoup.HtmlBodyPrase;
import com.webull.information.center.carwler.common.util.jsoup.JsoupPraseUtil;
import com.webull.information.center.common.constants.Constants;

/**
 * BusinessWire:美国商业资讯网
 * 
 * @author shimingjun
 * @date 2016年8月23日 下午1:06:02
 * @version 1.0
 * @since JDK 1.8
 */

public class Businesswire_HtmlPrase implements HtmlBodyPrase {
	protected final Logger logger = LogManager.getLogger(getClass());

	/**
	 * for example
	 * :http://www.businesswire.com/news/home/20160615005572/en/Market-
	 * Performance-Chinese-Listed-Port-Companies-2015
	 */
	private static final Map<String, String> langs = new Supplier<Map<String, String>>() {

		@Override
		public Map<String, String> get() {
			Map<String, String> res = new ConcurrentHashMap<>();
			res.put("zh", Constants.lang_zh);
			res.put("en", Constants.lang_en);
			return res;
		}
	}.get();

	@Override
	public void praseNewsInfo(org.jsoup.nodes.Document doc, NewsInformation info) {
		try {
			Element body = doc.select("article.bw-release-main").first();
			if (body == null)
				return;

			// lang
			Optional.ofNullable(doc.select("head>link[rel=canonical]").first())
					.map(link0 -> StringUtils.stripToNull(link0.attr("href"))).ifPresent(href0 -> {
						// http://www.businesswire.com/news/home/20150818005409/en/Vantage-mHealthcare-Announces-Banking-Agreement-China-Merchants
						String url = StringUtils.substringAfter(href0, "news/home/");
						url = StringUtils.replacePattern(url, "\\d{14}/", "");
						String lang = StringUtils.lowerCase(StringUtils.substringBefore(url, "/"));
						if (StringUtils.isBlank(info.getLanguage()) && StringUtils.isNotBlank(langs.get(lang))) {
							info.setLanguage(langs.get(lang));
						}
					});
			if (StringUtils.isBlank(info.getLanguage())) {
				info.setLanguage("en");
			}
			// title
			Optional.ofNullable(body.select("header>div>h1").first()).map(h1 -> StringUtils.stripToNull(h1.text()))
					.ifPresent(h1_0 -> info.setTitle(h1_0));
			// sourceName

			if (StringUtils.isBlank(info.getSourceName())) {
				info.setSourceName("Business Wire (press release)");
			}

			// new time
			Optional.ofNullable(body.select("div.bw-release-timestamp>time").first())
					.map(time2 -> StringUtils.stripToNull(time2.attr("datetime"))).ifPresent(pdate -> {
						// datetime="2016-06-15T10:12:00Z"
						info.setPushTime(pdate);
						try {
							String day = StringUtils.stripToEmpty(StringUtils.substringBefore(pdate, "T"));
							String time = StringUtils.stripToEmpty(
									StringUtils.substringBeforeLast(StringUtils.substringAfter(pdate, "T"), "Z"));

							Date d = UtilDate.parse(day + " " + time, 0, "yyyy-MM-dd HH:mm:ss");// 零时区
							Optional.ofNullable(d).ifPresent(d0 -> info.setNewsTime(d));
						} catch (Exception e) {
						}
					});

			// 新闻正文

			Optional.ofNullable(body.select("div[itemprop=articleBody]").first()).map(ps -> ps.getElementsByTag("P"))
					.ifPresent(ps0 -> {

						for (int i = ps0.size() - 1; i >= 0; i--) {
							JsoupPraseUtil.trimParagraph(ps0.get(i));
							if (ps0.get(i).hasClass("bwalignc")
									|| ps0.get(i).html().matches(".*(The corrected release reads:).*")
									|| !ps0.get(i).hasText())
								ps0.remove(i);
						}

						info.setContent(StringUtils.stripToNull(ps0.outerHtml()));
					});
		} catch (Exception e) {
			logger.warn(e);
		}
	}

	public static void main(String[] args) throws ParseException, IOException {
		String url2 = "http://www.businesswire.com/news/home/20160804005070/en/Technavio-Announces-Top-Vendors-Global-Transformer-Oil";
		url2 = "http://www.businesswire.com/news/home/20121129005647/en/Research-Markets-China-Tourism-Industry-Report-2012-2014";
		url2 = "http://www.businesswire.com/news/home/20111115006875/zh/";
		url2 = "http://www.businesswire.com/news/home/20110817006565/zh/";
		// url2 =
		// "http://www.businesswire.com/news/home/20160615006272/en/Deutsche-Bank-Appointed-Depositary-Bank-Sponsored-Level";
		Connection connection = Jsoup.connect(url2).userAgent(
				"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36")
				// .header("x-client-data",
				// "CIq2yQEIpbbJAQjEtskBCP2VygEIwpjKAQjwnMoB")
				.header("x-client-data", RandomStringUtils.randomAlphanumeric(40));

		// connection.proxy("127.0.0.1", 1080);
		org.jsoup.nodes.Document doc = connection.timeout(10000).get();
		NewsInformation info = new NewsInformation();
		new Businesswire_HtmlPrase().praseNewsInfo(doc, info);
		System.out.println(info);

		// Mon Mar 17, 2014 9:46pm EDT
		// Wed Jan 15 00:00:00 CST 2014
		// Date d = UtilDate.parse("Mon Mar 17, 2014 9:46pm", Locale.US,
		// TimeZone.getTimeZone("EDT"), "E MMM dd, yyyy hh:mma");
		// System.out.println(d);

	}
}
